Aim: Detect aberrantly expressed genes and pathways in ATG7 patients
GenomicAlignment::summarizedOverlaps
cd /tools
git clone https://github.com/broadinstitute/gtex-pipeline
chmod -R 775 gtex-pipeline
# generate the genes gtf
/tools/gtex-pipeline/gene_model/collapse_annotation.py \
/data/references/ensembl/gtf_gff3/v97/Homo_sapiens.GRCh38.97.gtf \
/data/references/ensembl/gtf_gff3/v97/Homo_sapiens.GRCh38.97.genes.gtfsource("/home/dzhang/projects/ATG7_rob_t_analysis/scripts/aberrant_expression/get_gene_count_RSE.R")load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/gene_count_ods.rda")
colData(gene_count_ods) %>%
as.data.frame() %>%
as_tibble()load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/gene_count_ods.rda")
which_ATG7 <- which(colData(gene_count_ods)[["gene_name"]] == "ATG7")
which_control <- which(str_detect(colData(gene_count_ods)[["samp_id_tidy"]], "control"))
ATG7_only_ods <- gene_count_ods[names(gene_count_ods) == "ENSG00000197548" , c(which_ATG7, which_control)]
assays(ATG7_only_ods)[["fpkm"]] %>%
as.data.frame() %>%
gather(key = "samp_id", value = "fpkm") %>%
mutate(family = c(2, 2, 3, 4, 1, 1, "control", "control")) %>%
ggplot(aes(x = samp_id, y = fpkm, fill = as.character(family))) +
geom_col(colour = "black") +
scale_x_discrete(name = "Sample ID") +
scale_y_continuous(name = "FPKM") +
scale_fill_manual(name = "Family",
values = ggpubr::get_palette("npg", 5)) +
theme_bw()load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/ATG7_res_all.rda")
load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/gene_count_corrected_example.rda")
load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/gene_count_ods.rda")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/FPKM.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/expressed_genes.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/count_cor_pre_correction.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/count_gene_sample_pre_correction.png")Here, I have included one example. In reality, there's plots like these for each ATG7 sample as I have re-run each ATG7 with the other ATG7 samples removed.
knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/count_cor_post_correction_M0920.18.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/count_gene_sample_post_correction_M0920.18.png")which_ATG7 <- which(colData(gene_count_ods)[["gene_name"]] == "ATG7")
ATG7_samp_ids <- colData(gene_count_ods)[["samp_id_tidy"]][which_ATG7] %>%
as.character()
stopifnot(all(ATG7_samp_ids %in% ATG7_res_all$sampleID))
ATG7_res_ATG7_only <- ATG7_res_all %>%
filter(sampleID %in% ATG7_samp_ids) %>%
mutate(padjust = p.adjust(pValue, method = "fdr")) %>%
dplyr::select(Description, everything())ATG7_res_ATG7_only_signif <- ATG7_res_ATG7_only %>%
filter(padjust <= 0.05) %>%
dplyr::select(Description, everything())
ATG7_res_ATG7_only_signifATG7_res_ATG7_only_signif$sampleID %>% table()## .
## M0920.18 M0921.18 M1111.19 M1716.19 M1856.17 S2557
## 7 341 12 17 38 687
ATG7_res_ATG7_only_signif %>%
filter(Description == "ATG7")ATG7_res_ATG7_only_signif %>%
dplyr::count(Description) %>%
filter(n > 1) %>%
arrange(desc(n))ATG7_res_all %>%
filter(str_detect(sampleID, "control")) %>%
mutate(padjust = p.adjust(pValue, method = "fdr")) %>%
filter(padjust <= 0.05)signif_genes_remaining_samp <- ATG7_res_all %>%
filter(!(sampleID %in% ATG7_samp_ids)) %>%
mutate(padjust = p.adjust(pValue, method = "fdr")) %>%
filter(padjust <= 0.05) %>%
.[["Description"]]
mean(unique(ATG7_res_ATG7_only_signif$Description) %in% unique(signif_genes_remaining_samp))## [1] 0.1444867
hits_gprofiler <-
gprofiler2::gost(query = unique(ATG7_res_ATG7_only_signif %>%
arrange(padjust) %>%
.[["geneID"]]),
organism = "hsapiens",
ordered_query = F,
significant = T,
user_threshold = 0.05,
correction_method = c("g_SCS"),
domain_scope = c("custom"),
custom_bg = unique(ATG7_res_all$geneID),
sources = c("GO", "KEGG", "REAC"),
evcodes = F)
gprofiler2::gostplot(hits_gprofiler)library(KEGGREST)
query_kegg_pathway <- function(pathway_name, organism = "hsa"){
# get all pathways associated with humans
all_pathways <- keggList("pathway", organism)
# filter to only selected pathway and get information about it
selected_pathway <- all_pathways[str_detect(all_pathways, pathway_name)]
print(str_c("Getting info for the pathways: ", unname(selected_pathway) %>% str_c(collapse = ", ")))
selected_pathway_info <- keggGet(names(selected_pathway))
return(selected_pathway_info)
}
get_pathway_gene_info <- function(kegg_pathway_info, organism = "hsa"){
pathway_gene_info_all <- tibble()
for(i in 1:length(kegg_pathway_info)){
odds <- (1:length(kegg_pathway_info[[i]]$GENE))[(1:length(kegg_pathway_info[[i]]$GENE) %% 2) != 0]
evens <- (1:length(kegg_pathway_info[[i]]$GENE))[(1:length(kegg_pathway_info[[i]]$GENE) %% 2) == 0]
pathway_gene_info <-
tibble(pathway_kegg_code = kegg_pathway_info[[i]]$ENTRY,
pathway_name = kegg_pathway_info[[i]]$NAME,
organism = organism,
kegg_gene_entry = kegg_pathway_info[[i]]$GENE[odds],
gene = kegg_pathway_info[[i]]$GENE[evens]) %>%
separate(col = "gene", into = c("gene_name", "gene_desc"), sep = "; ")
pathway_gene_info_all <- pathway_gene_info_all %>% bind_rows(pathway_gene_info)
}
# for(i in 1:nrow(pathway_gene_info)){
#
# gene_info <- keggGet(str_c(organism, ":", pathway_gene_info$kegg_gene_entry)[i])
# pathway_gene_info$gene_identifiers[i] <- gene_info[[1]]$DBLINKS %>% str_c(collapse = ";")
#
# }
return(pathway_gene_info_all)
}
autophagy_kegg_info <- query_kegg_pathway(pathway_name = "Autophagy", organism = "hsa")## [1] "Getting info for the pathways: Autophagy - other - Homo sapiens (human), Autophagy - animal - Homo sapiens (human)"
autophagy_gene_info <- get_pathway_gene_info(kegg_pathway_info = autophagy_kegg_info, organism = "hsa")
autophagy_gene_info$gene_name %>% unique()## [1] "RPTOR" "MTOR" "MLST8" "ULK2" "ATG13" "ATG101"
## [7] "IGBP1" "PPP2CA" "PPP2CB" "ATG9B" "ATG9A" "ATG2A"
## [13] "ATG2B" "WIPI2" "WIPI1" "BECN1" "BECN2" "PIK3C3"
## [19] "PIK3R4" "ATG12" "ATG5" "ATG16L1" "ATG7" "ATG10"
## [25] "ATG3" "GABARAP" "GABARAPL1" "GABARAPL2" "ATG4A" "ATG4B"
## [31] "ATG4C" "ATG4D" "INS" "IGF1R" "IRS1" "IRS2"
## [37] "IRS4" "PIK3CA" "PIK3CD" "PIK3CB" "PIK3R1" "PIK3R2"
## [43] "PIK3R3" "PDPK1" "AKT1" "AKT2" "AKT3" "PTEN"
## [49] "HRAS" "KRAS" "NRAS" "MRAS" "RRAS" "RRAS2"
## [55] "RAF1" "MAP2K1" "MAP2K2" "MAPK1" "MAPK3" "HIF1A"
## [61] "DDIT4" "TSC2" "TSC1" "BNIP3" "RHEB" "DEPTOR"
## [67] "AKT1S1" "RPS6KB1" "RPS6KB2" "ULK1" "RB1CC1" "SUPT20H"
## [73] "SMCR8" "WDR41" "C9orf72" "RAB8A" "RAB39B" "SQSTM1"
## [79] "TBK1" "TANK" "RAB1A" "RRAGA" "RRAGB" "RRAGC"
## [85] "RRAGD" "STK11" "PRKAA1" "PRKAA2" "PRKACA" "PRKACB"
## [91] "PRKACG" "PRKCD" "MAPK8" "MAPK10" "MAPK9" "BCL2"
## [97] "BCL2L1" "BAD" "NRBF2" "ATG14" "AMBRA1" "TRAF6"
## [103] "TP53INP2" "VMP1" "STX17" "UVRAG" "SH3GLB1" "RUBCN"
## [109] "CAMKK2" "MAP3K7" "ERN1" "ITPR1" "DAPK1" "DAPK3"
## [115] "DAPK2" "HMGB1" "PRAP1" "EIF2AK3" "EIF2AK4" "EIF2S1"
## [121] "MTMR3" "MTMR4" "MTMR14" "ZFYVE1" "ATG16L2" "CFLAR"
## [127] "RAB33B" "PRKCQ" "LAMP1" "LAMP2" "RAB7A" "RAB7B"
## [133] "VAMP8" "SNAP29" "CTSD" "CTSL" "CTSB"
ATG7_res_ATG7_only_signif[ATG7_res_ATG7_only_signif$Description %in% unique(autophagy_gene_info$gene_name),]ATG7_res_ATG7_only %>%
filter(Description == "NFE2L2")load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/ATG7_res_all.rda")
load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/gene_count_corrected_example.rda")
load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/gene_count_ods.rda")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/FPKM.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/expressed_genes.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/count_cor_pre_correction.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/count_gene_sample_pre_correction.png")plotEncDimSearch(gene_count_ods)Here, I have included one example. In reality, there's plots like these for each ATG7 sample as I have re-run each ATG7 with the other ATG7 samples removed.
knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/count_cor_post_correction_M0920.18.png")knitr::include_graphics("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/20_controls/count_gene_sample_post_correction_M0920.18.png")which_ATG7 <- which(colData(gene_count_ods)[["gene_name"]] == "ATG7")
ATG7_samp_ids <- colData(gene_count_ods)[["samp_id_tidy"]][which_ATG7] %>%
as.character()
stopifnot(all(ATG7_samp_ids %in% ATG7_res_all$sampleID))
ATG7_res_ATG7_only <- ATG7_res_all %>%
filter(sampleID %in% ATG7_samp_ids) %>%
mutate(padjust = p.adjust(pValue, method = "fdr")) %>%
dplyr::select(Description, everything())ATG7_res_ATG7_only_signif <- ATG7_res_ATG7_only %>%
filter(padjust <= 0.05) %>%
dplyr::select(Description, everything())
ATG7_res_ATG7_only_signifATG7_res_ATG7_only_signif$sampleID %>% table()## .
## M0921.18
## 251
ATG7_res_ATG7_only %>%
filter(Description == "ATG7")signif_genes_remaining_samp <- ATG7_res_all %>%
filter(!(sampleID %in% ATG7_samp_ids)) %>%
mutate(padjust = p.adjust(pValue, method = "fdr")) %>%
filter(padjust <= 0.05) %>%
.[["Description"]]
mean(unique(ATG7_res_ATG7_only_signif$Description) %in% unique(signif_genes_remaining_samp))## [1] 0.4780876
hits_gprofiler <-
gprofiler2::gost(query = unique(ATG7_res_ATG7_only_signif %>%
arrange(padjust) %>%
.[["geneID"]]),
organism = "hsapiens",
ordered_query = F,
significant = T,
user_threshold = 0.05,
correction_method = c("g_SCS"),
domain_scope = c("custom"),
custom_bg = unique(ATG7_res_all$geneID),
sources = c("GO", "KEGG", "REAC"),
evcodes = F)
hits_gprofiler$resultgprofiler2::gostplot(hits_gprofiler)ATG7_res_ATG7_only_signif[ATG7_res_ATG7_only_signif$Description %in% unique(autophagy_gene_info$gene_name),]ATG7_res_ATG7_only %>%
filter(Description == "NFE2L2")load(file = "/home/dzhang/projects/ATG7_rob_t_analysis/results/DESeq2/ATG7_deseq2_res.rda")load(file = "/home/dzhang/projects/ATG7_rob_t_analysis/results/DESeq2/ATG7_deseq2_res.rda")
ATG7_deseq2_res %>%
filter(Description == "ATG7")load("/home/dzhang/projects/ATG7_rob_t_analysis/results/OUTRIDER/50_controls/ATG7_res_all.rda")
hits_gprofiler <-
gprofiler2::gost(query = ATG7_deseq2_res %>%
arrange(pvalue) %>%
.[["Description"]],
organism = "hsapiens",
ordered_query = T,
significant = T,
user_threshold = 0.05,
correction_method = c("g_SCS"),
domain_scope = c("custom"),
custom_bg = unique(ATG7_res_all$geneID),
sources = c("GO", "KEGG", "REAC"),
evcodes = F)
hits_gprofiler$result %>%
as_tibble() gprofiler2::gostplot(hits_gprofiler)load(file = "/home/dzhang/projects/ATG7_rob_t_analysis/results/DESeq2/COL6A_deseq2_res.rda")
hits_gprofiler <-
gprofiler2::gost(query = COL6A_deseq2_res %>%
arrange(pvalue) %>%
.[["Description"]],
organism = "hsapiens",
ordered_query = T,
significant = T,
user_threshold = 0.05,
correction_method = c("g_SCS"),
domain_scope = c("custom"),
custom_bg = unique(ATG7_res_all$geneID),
sources = c("GO", "KEGG", "REAC"),
evcodes = F)
hits_gprofiler$result %>%
as_tibble()gprofiler2::gostplot(hits_gprofiler)